#read all csv file in diffrent dataframes
import pandas as pd
import os
p23 = pd.read_csv('iprecipitation_2023.csv')
p22 = pd.read_csv('iprecipitation_2022.csv')
p21 = pd.read_csv('iprecipitation_2021.csv')
p20 = pd.read_csv('iprecipitation_2020.csv')
p19 = pd.read_csv('iprecipitation_2019.csv')
p18 = pd.read_csv('iprecipitation_2018.csv')
p17 = pd.read_csv('iprecipitation_2017.csv')
p16 = pd.read_csv('iprecipitation_2016.csv')
p15 = pd.read_csv('iprecipitation_2015.csv')
p14 = pd.read_csv('iprecipitation_2014.csv')
p13 = pd.read_csv('iprecipitation_2013.csv')
p12 = pd.read_csv('iprecipitation_2012.csv')
p11 = pd.read_csv('iprecipitation_2011.csv')
p10 = pd.read_csv('iprecipitation_2010.csv')
p9 = pd.read_csv('iprecipitation_2009.csv')
p8 = pd.read_csv('iprecipitation_2008.csv')
p7 = pd.read_csv('iprecipitation_2007.csv')
p6 = pd.read_csv('iprecipitation_2006.csv')
p5 = pd.read_csv('iprecipitation_2005.csv')
p4 = pd.read_csv('iprecipitation_2004.csv')
p3 = pd.read_csv('iprecipitation_2003.csv')
mon = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','lat','lon']
p3.columns = mon
p4.columns = mon
p5.columns = mon
p6.columns = mon
p7.columns = mon
p8.columns = mon
p9.columns = mon
p10.columns = mon
p11.columns = mon
p12.columns = mon
p13.columns = mon
p14.columns = mon
p15.columns = mon
p16.columns = mon
p17.columns = mon
p18.columns = mon
p19.columns = mon
p20.columns = mon
p21.columns = mon
p22.columns = mon
p23.columns = mon
import matplotlib.pyplot as plt
monthly_averages2023 = p23.mean()
monthly_averages2023 = monthly_averages2023.drop(['lat','lon'])
monthly_averages2022 = p22.mean()
monthly_averages2022 = monthly_averages2022.drop(['lat','lon'])
monthly_averages2021 = p21.mean()
monthly_averages2021 = monthly_averages2021.drop(['lat','lon'])
monthly_averages2020 = p20.mean()
monthly_averages2020 = monthly_averages2020.drop(['lat','lon'])
monthly_averages2019 = p19.mean()
monthly_averages2019 = monthly_averages2019.drop(['lat','lon'])
monthly_averages2018 = p18.mean()
monthly_averages2018 = monthly_averages2018.drop(['lat','lon'])
monthly_averages2017 = p17.mean()
monthly_averages2017 = monthly_averages2017.drop(['lat','lon'])
monthly_averages2016 = p16.mean()
monthly_averages2016 = monthly_averages2016.drop(['lat','lon'])
monthly_averages2015 = p15.mean()
monthly_averages2015 = monthly_averages2015.drop(['lat','lon'])
monthly_averages2014 = p14.mean()
monthly_averages2014 = monthly_averages2014.drop(['lat','lon'])
monthly_averages2013 = p13.mean()
monthly_averages2013 = monthly_averages2013.drop(['lat','lon'])
monthly_averages2012 = p12.mean()
monthly_averages2012 = monthly_averages2012.drop(['lat','lon'])
monthly_averages2011 = p11.mean()
monthly_averages2011 = monthly_averages2011.drop(['lat','lon'])
monthly_averages2010 = p10.mean()
monthly_averages2010 = monthly_averages2010.drop(['lat','lon'])
monthly_averages2009 = p9.mean()
monthly_averages2009 = monthly_averages2009.drop(['lat','lon'])
monthly_averages2008 = p8.mean()
monthly_averages2008 = monthly_averages2008.drop(['lat','lon'])
monthly_averages2007 = p7.mean()
monthly_averages2007 = monthly_averages2007.drop(['lat','lon'])
monthly_averages2006 = p6.mean()
monthly_averages2006 = monthly_averages2006.drop(['lat','lon'])
monthly_averages2005 = p5.mean()
monthly_averages2005 = monthly_averages2005.drop(['lat','lon'])
monthly_averages2004 = p4.mean()
monthly_averages2004 = monthly_averages2004.drop(['lat','lon'])
monthly_averages2003 = p3.mean()
monthly_averages2003 = monthly_averages2003.drop(['lat','lon'])
#plot the data of each month value in graph point
monthly_averages2023.plot(kind='line')
plt.show()
monthly_averages2022.plot(kind='line')
plt.show()
monthly_averages2021.plot(kind='line')
plt.show()
maindf = pd.concat([monthly_averages2003, monthly_averages2004, monthly_averages2005, monthly_averages2006, monthly_averages2007, monthly_averages2008, monthly_averages2009, monthly_averages2010, monthly_averages2011, monthly_averages2012, monthly_averages2013, monthly_averages2014, monthly_averages2015, monthly_averages2016, monthly_averages2017, monthly_averages2018, monthly_averages2019, monthly_averages2020, monthly_averages2021, monthly_averages2022, monthly_averages2023], axis=1)
maindf.columns = range(2003,2024)
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
ls = [maindf.loc[month].mean() for month in months]
maindf['avg_of_all'] = ls
#plot the avg of all month in each year
lsa= maindf['avg_of_all']
lsa.plot(kind='line')
plt.show()
sam = maindf['avg_of_all']
amamoly = maindf.sub(sam, axis=0)
amamoly1 = maindf
# i need the analmoly in time series
amamoly.plot()
plt.show()
data ={
'index':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'2003':amamoly1[2003],
'2004':amamoly1[2004],
'2005':amamoly1[2005],
'2006':amamoly1[2006],
'2007':amamoly1[2007],
'2008':amamoly1[2008],
'2009':amamoly1[2009],
'2010':amamoly1[2010],
'2011':amamoly1[2011],
'2012':amamoly1[2012],
'2013':amamoly1[2013],
'2014':amamoly1[2014],
'2015':amamoly1[2015],
'2016':amamoly1[2016],
'2017':amamoly1[2017],
'2018':amamoly1[2018],
'2019':amamoly1[2019],
'2020':amamoly1[2020],
'2021':amamoly1[2021],
'2022':amamoly1[2022],
'2023':amamoly1[2023]
}
df = pd.DataFrame(data)
# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')
# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], df_long['Value'], marker='', linestyle='-', color='black')
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel(' Precipitation mm/hr') # Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
#write the data to csv file
df_long.to_csv('precipitation_anomalyINdo.csv', index=False)
data ={
'index':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'2003':amamoly[2003],
'2004':amamoly[2004],
'2005':amamoly[2005],
'2006':amamoly[2006],
'2007':amamoly[2007],
'2008':amamoly[2008],
'2009':amamoly[2009],
'2010':amamoly[2010],
'2011':amamoly[2011],
'2012':amamoly[2012],
'2013':amamoly[2013],
'2014':amamoly[2014],
'2015':amamoly[2015],
'2016':amamoly[2016],
'2017':amamoly[2017],
'2018':amamoly[2018],
'2019':amamoly[2019],
'2020':amamoly[2020],
'2021':amamoly[2021],
'2022':amamoly[2022],
'2023':amamoly[2023]
}
df = pd.DataFrame(data)
# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')
# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], df_long['Value'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['Value'],where= df_long['Value']>0, color='blue', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['Value'],where= df_long['Value']<0, color='red', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel(' Precipitation mm/hr') # Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def moving_average(data, window_size):
return data.rolling(window=window_size, center=True).mean()
def kz_filter(data, window_size, iterations):
result = data.copy()
for _ in range(iterations):
result = moving_average(result, window_size)
return result# Apply KZ filter
window_size = 5
iterations = 3
df_longss = df_long.copy()
df_longss['Filtered_Value5'] = kz_filter(df_long['Value'], window_size, iterations)
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value5'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value5'],where=df_longss['Filtered_Value5']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value5'],where=df_longss['Filtered_Value5']<0, color='blue', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 5 ')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def moving_average(data, window_size):
return data.rolling(window=window_size, center=True).mean()
def kz_filter(data, window_size, iterations):
result = data.copy()
for _ in range(iterations):
result = moving_average(result, window_size)
return result# Apply KZ filter
window_size = 3
iterations = 3
df_longss['Filtered_Value3'] = kz_filter(df_long['Value'], window_size, iterations)
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value3'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value3'],where=df_longss['Filtered_Value3']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value3'],where=df_longss['Filtered_Value3']<0, color='blue', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def moving_average(data, window_size):
return data.rolling(window=window_size, center=True).mean()
def kz_filter(data, window_size, iterations):
result = data.copy()
for _ in range(iterations):
result = moving_average(result, window_size)
return result# Apply KZ filter
window_size = 7
iterations = 3
df_longss['Filtered_Value7'] = kz_filter(df_long['Value'], window_size, iterations)
# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value7'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value7'],where=df_longss['Filtered_Value7']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value7'],where=df_longss['Filtered_Value7']<0, color='blue', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 7')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')
# Apply Gaussian smoothing
sigma = 3 # Standard deviation for Gaussian kernel
smoothed_values1 = gaussian_filter1d(df_long['Value'], sigma=sigma)
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')
# Apply Gaussian smoothing
sigma = 5 # Standard deviation for Gaussian kernel
smoothed_values2 = gaussian_filter1d(df_long['Value'], sigma=sigma)
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values2, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 5')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
# Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')
# Apply Gaussian smoothing
sigma = 7 # Standard deviation for Gaussian kernel
smoothed_values3 = gaussian_filter1d(df_long['Value'], sigma=sigma)
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values3, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 7')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.signal import convolve
def triangle_kernel(size):
kernel = np.zeros(size)
half_size = size // 2
for i in range(half_size + 1):
kernel[i] = i + 1
kernel[-(i + 1)] = i + 1
return kernel / kernel.sum()
kernel_size = 3 # Adjust size as needed
kernel = triangle_kernel(kernel_size)
# Apply triangle smoothing
Tsmoothed_values = convolve(df_long['Value'], kernel, mode='same')
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], Tsmoothed_values, where=Tsmoothed_values>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values, where=Tsmoothed_values<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.signal import convolve
def triangle_kernel(size):
kernel = np.zeros(size)
half_size = size // 2
for i in range(half_size + 1):
kernel[i] = i + 1
kernel[-(i + 1)] = i + 1
return kernel / kernel.sum()
kernel_size = 5 # Adjust size as needed
kernel = triangle_kernel(kernel_size)
# Apply triangle smoothing
Tsmoothed_values5 = convolve(df_long['Value'], kernel, mode='same')
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values5, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], Tsmoothed_values5, where=Tsmoothed_values5>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values5, where=Tsmoothed_values5<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 5')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
from scipy.signal import convolve
def triangle_kernel(size):
kernel = np.zeros(size)
half_size = size // 2
for i in range(half_size + 1):
kernel[i] = i + 1
kernel[-(i + 1)] = i + 1
return kernel / kernel.sum()
kernel_size = 7 # Adjust size as needed
kernel = triangle_kernel(kernel_size)
# Apply triangle smoothing
Tsmoothed_values7 = convolve(df_long['Value'], kernel, mode='same')
# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values7, color='black')
plt.fill_between(df_long['Date'], Tsmoothed_values7, where=Tsmoothed_values7>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values7, where=Tsmoothed_values7<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 7')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Show the plot
plt.grid(True)
plt.show()
import pandas as pd
# Assuming amamoly is already defined
pf = amamoly.drop('avg_of_all', axis=1)
# Calculate 3-month moving averages
m1 = (pf.loc['Jan'] + pf.loc['Feb'] + pf.loc['Mar']) / 3
m2 = (pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr']) / 3
m3 = (pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May']) / 3
m4 = (pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun']) / 3
m5 = (pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul']) / 3
m6 = (pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug']) / 3
m7 = (pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep']) / 3
m8 = (pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct']) / 3
m9 = (pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov']) / 3
m10 = (pf.loc['Oct'] + pf.loc['Nov'] + pf.loc['Dec']) / 3
# Concatenate the results into a single DataFrame
df = pd.concat([m1, m2, m3, m4, m5, m6, m7, m8, m9, m10], axis=1)
# Set appropriate column names
df.columns = range(1, 11)
# Add a 'Year' column and set it as the index
df['Year'] = range(2003, 2024)
# Transform the DataFrame from wide to long format
df_melted = df.melt(id_vars=["Year"], var_name="Month", value_name="Value")
# Print the first few rows of the melted DataFrame for verification
# store the data in a variable by taking the values of df_melted by sorting the year and month
a = df_melted.sort_values(by=['Year', 'Month'])
a["Date"] = pd.to_datetime(a['Year'].astype(str) + '-' + a['Month'].astype(str), format='%Y-%m')
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(a['Date'], a['Value'], linestyle='-', color='black')
plt.fill_between(a['Date'], a['Value'],where=a['Value']>0, color='r', alpha=0.6)
plt.fill_between(a['Date'], a['Value'],where=a['Value']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('3-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
import pandas as pd
# Assuming amamoly is a DataFrame with appropriate data
pf = amamoly.drop('avg_of_all', axis=1)
# Calculate 5-month moving averages
m1 = (pf.loc['Jan'] + pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May']) / 5
m2 = (pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun']) / 5
m3 = (pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul']) / 5
m4 = (pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug']) / 5
m5 = (pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep']) / 5
m6 = (pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct']) / 5
m7 = (pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov']) / 5
m8 = (pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov'] + pf.loc['Dec']) / 5
# Concatenate the results into a single DataFrame
df = pd.concat([m1, m2, m3, m4, m5, m6, m7, m8], axis=1)
# Set appropriate column names
df.columns = range(1, 9)
# Add a 'Year' column and set it as the index
df['Year'] = range(2003, 2024)
# Transform the DataFrame from wide to long format
df_melted = df.melt(id_vars=["Year"], var_name="Month", value_name="Value")
# Sort the DataFrame by 'Year' and 'Month'
b = df_melted.sort_values(by=['Year', 'Month'])
# Convert 'Year' and 'Month' to datetime format
b["Date"] = pd.to_datetime(b['Year'].astype(str) + '-' + b['Month'].astype(str), format='%Y-%m')
# Print the first few rows of the melted DataFrame for verification
print(b.head())
Year Month Value Date 0 2003 1 -0.019892 2003-01-01 21 2003 2 -0.037762 2003-02-01 42 2003 3 -0.060655 2003-03-01 63 2003 4 -0.050009 2003-04-01 84 2003 5 -0.052912 2003-05-01
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(b['Date'], b['Value'], linestyle='-', color='black')
plt.fill_between(b['Date'], b['Value'],where=b['Value']>0, color='r', alpha=0.6)
plt.fill_between(b['Date'], b['Value'],where=b['Value']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Date')
plt.ylabel('precepitation mm/hr')
# Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
#create a data frame and read the DMI data
dmi = pd.read_csv('DMI.csv')
dmi['Date'] = pd.to_datetime(dmi['Date'], format='%Y-%m-%d')
plt.figure(figsize=(18, 5))
plt.plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
plt.fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
plt.title('Dipole Mode Index ')
plt.xlabel('Year')
plt.ylabel('DMI Value')
# Add gridlines
plt.grid(True)
import pandas as pd
import matplotlib.pyplot as plt
# Load the data
onia = pd.read_csv('oni_data.csv')
onia = onia[onia['Year'] > 2002]
# Create the plot
onia["Month"] = [i%12+1 for i in range(len(onia))]
onia.info()
plt.figure(figsize=(18, 5))
# Convert 'Year' and 'Month' to datetime format
onia["Date"] = pd.to_datetime(onia['Year'].astype(str) + '-' + onia['Month'].astype(str), format='%Y-%m')
# Plot the ONI data
plt.plot(onia['Date'], onia["ANOM"], color='black', marker='', label='ONI')
# Fill the positive and negative areas with different colors
plt.fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
plt.title('Oceanic Niño Index')
plt.xlabel('Year')
plt.ylabel('ONI Value')
# Add gridlines
plt.grid(True)
# Show the plot
plt.legend()
plt.show()
print(len(onia))
<class 'pandas.core.frame.DataFrame'> Index: 257 entries, 636 to 892 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 SEAS 257 non-null object 1 Year 257 non-null int64 2 Total 257 non-null float64 3 ANOM 257 non-null float64 4 Month 257 non-null int64 dtypes: float64(2), int64(2), object(1) memory usage: 12.0+ KB
257
# Read the CSV file
PDO = pd.read_csv('PDO.csv')
print(PDO.head())
# Convert 'Date' to datetime format
PDO['Date'] = pd.to_datetime(PDO['Date'], format='%Y-%m-%d')
# Filter the DataFrame for the years 2002 to 2023
PDO = PDO[(PDO['Year'] > 2002) & (PDO['Year'] < 2024)]
# Plot the data of PDO
plt.figure(figsize=(18, 5))
plt.plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
plt.fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
plt.title('Pacific Decadal Oscillation')
plt.xlabel('Year')
plt.ylabel('PDO Value')
# Add gridlines
plt.grid(True)
# Show the plot
plt.legend()
plt.show()
Year Month PDO Date 0 1854 Jan 0.11 1854-01-01 1 1854 Feb -0.24 1854-02-01 2 1854 Mar -0.40 1854-03-01 3 1854 Apr -0.44 1854-04-01 4 1854 May -0.54 1854-05-01
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
# Assuming amamoly is already defined
data = {
'index': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'2003': amamoly[2003],
'2004': amamoly[2004],
'2005': amamoly[2005],
'2006': amamoly[2006],
'2007': amamoly[2007],
'2008': amamoly[2008],
'2009': amamoly[2009],
'2010': amamoly[2010],
'2011': amamoly[2011],
'2012': amamoly[2012],
'2013': amamoly[2013],
'2014': amamoly[2014],
'2015': amamoly[2015],
'2016': amamoly[2016],
'2017': amamoly[2017],
'2018': amamoly[2018],
'2019': amamoly[2019],
'2020': amamoly[2020],
'2021': amamoly[2021],
'2022': amamoly[2022],
'2023': amamoly[2023]
}
# Create DataFrame
df = pd.DataFrame(data)
# Melt the DataFrame to long format
df_longs = df.melt(id_vars=['index'], var_name='Year', value_name='Value')
# Convert 'index' to datetime
df_longs['Date'] = pd.to_datetime(df_longs['Year'] + '-' + df_longs['index'], format='%Y-%b')
# Sort by date to ensure continuous line plot
df_longs = df_longs.sort_values(by='Date')
# Calculate 3-month moving average
df_longs['3_month_MA'] = df_longs['Value'].rolling(window=3, min_periods=1).mean()
# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(df_longs['Date'], df_longs['3_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_longs['Date'], df_longs['3_month_MA'],where=df_longs['3_month_MA']>0, color='red', alpha=0.6,interpolate=True)
plt.fill_between(df_longs['Date'], df_longs['3_month_MA'],where=df_longs['3_month_MA']<0, color='blue', alpha=0.6,interpolate=True)
# Adding title and labels
plt.title('3-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Rotate x-axis labels for better readability
# Show the plot
plt.grid(True)
plt.show()
df = pd.DataFrame(data)
# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')
# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')
# Sort by date to ensure continuous line plot
df_long = df_long.sort_values(by='Date')
# Calculate 3-month moving average
df_long['5_month_MA'] = df_long['Value'].rolling(window=5, min_periods=1).mean()
# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(df_long['Date'], df_long['5_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['5_month_MA'],where=df_long['5_month_MA']>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['5_month_MA'],where=df_long['5_month_MA']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Show the plot
plt.grid(True)
plt.show()
# Calculate 3-month moving average
df_long['7_month_MA'] = df_long['Value'].rolling(window=7, min_periods=1).mean()
# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))
# Plot the continuous line
plt.plot(df_long['Date'], df_long['7_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['7_month_MA'],where=df_long['7_month_MA']>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['7_month_MA'],where=df_long['7_month_MA']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Show the plot
plt.grid(True)
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value3'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value3'], where=df_longss['Filtered_Value3'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value3'], where=df_longss['Filtered_Value3'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
# Rotate x-axis labels for better readability
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value5'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
# Rotate x-axis labels for better readability
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value7'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value7'], where=df_longss['Filtered_Value7'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value7'], where=df_longss['Filtered_Value7'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
# Rotate x-axis labels for better readability
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values2, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values3, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values,where=Tsmoothed_values>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values,where=Tsmoothed_values<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values5, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values5,where=Tsmoothed_values5>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values5,where=Tsmoothed_values5<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values7, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values7,where=Tsmoothed_values7>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values7,where=Tsmoothed_values7<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(b['Date'], b['Value'], linestyle='-', color='black')
axs[1].fill_between(b['Date'], b['Value'], where=b['Value'] > 0, color='r', alpha=0.6)
axs[1].fill_between(b['Date'], b['Value'], where=b['Value'] < 0, color='b', alpha=0.6)
axs[1].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Value')
axs[1].grid(True)
# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(4, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['7_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Value')
axs[1].grid(True)
# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')
# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')
# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')
# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(5, 1, figsize=(18, 13))
# Plot the third graph in the third subplot
axs[0].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[0].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[0].set_title('Oceanic Niño Index')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('ONI Value')
# Add gridlines
axs[0].grid(True)
# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['7_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[2].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[2].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[2].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('precepitation mm/hr')
axs[2].grid(True)
# Plot the second graph in the second subplot
axs[3].plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
axs[3].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
axs[3].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
axs[3].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 3')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('precepitation mm/hr')
axs[3].grid(True)
axs[4].plot(df_longss['Date'],df_longss['Filtered_Value5'], linestyle='-', color='black')
axs[4].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='r', alpha=0.6)
axs[4].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='b', alpha=0.6)
axs[4].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[4].set_xlabel('Year')
axs[4].set_ylabel('precepitation mm/hr')
axs[4].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(2, 1, figsize=(18, 5))
# Plot the second graph in the second subplot
axs[0].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[0].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[0].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[0].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('Value')
axs[0].grid(True)
# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')
# Add gridlines
axs[1].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(2, 1, figsize=(18, 5))
# Plot the second graph in the second subplot
axs[0].plot(df_longss['Date'],df_longss['Filtered_Value5'], linestyle='-', color='black')
axs[0].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='r', alpha=0.6)
axs[0].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='b', alpha=0.6)
axs[0].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('Value')
axs[0].grid(True)
# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')
# Add gridlines
axs[1].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
fig, axs = plt.subplots(2, 1, figsize=(18, 5))
# Plot the second graph in the second subplot
axs[0].plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
axs[0].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
axs[0].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
axs[0].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 3')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('precepitation mm/hr')
axs[0].grid(True)
# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')
# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)
# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')
# Add gridlines
axs[1].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
p = PDO['PDO'].values
dfss = df_long
dfss['PDO'] = p
dfss.head()
onia = onia[onia['Year'] < 2024]
k = onia['ANOM'].values
dfss['ONI'] = k
r = dmi['PDO'].values
dfss['DMI'] = r
s= df_longs['3_month_MA'].values
dfss['3_month_MA'] = s
dfss["triangular_smoothing3"] = Tsmoothed_values
dfss["triangular_smoothing5"] = Tsmoothed_values5
dfss["triangular_smoothing7"] = Tsmoothed_values7
dfss["gaussian_smoothing3"] = smoothed_values1
dfss["gaussian_smoothing5"] = smoothed_values2
dfss["gaussian_smoothing7"] = smoothed_values3
q = df_longss['Filtered_Value3'].values
dfss['KZ_Filter3'] = q
z =df_longss['Filtered_Value5'].values
dfss['KZ_Filter5'] = z
x = df_longss['Filtered_Value7'].values
dfss['KZ_Filter7'] = x
sz = df_long['7_month_MA'].values
dfss['7_month_MA'] = sz
l= df_long['5_month_MA'].values
dfss['5_month_MA'] = l
#corelation between PDO and 5 month moving average
dfss['5_month_MA'].corr(dfss['PDO'])
-0.397567276986067
dfss['5_month_MA'].corr(dfss['ONI'])
-0.7714410169199405
dfss['5_month_MA'].corr(dfss['DMI'])
-0.36837887683100584
dfss['5_month_MA'].corr(dfss['PDO'])
-0.397567276986067
dfss['triangular_smoothing3'].corr(dfss['PDO'])
-0.3233588011759541
dfss['triangular_smoothing3'].corr(dfss['ONI'])
-0.6702292059163077
dfss['triangular_smoothing3'].corr(dfss['DMI'])
-0.4306284113519841
print(dfss['triangular_smoothing5'].corr(dfss['PDO']))
print(dfss['triangular_smoothing5'].corr(dfss['ONI']))
print(dfss['triangular_smoothing5'].corr(dfss['DMI']))
-0.3559615975993508 -0.7048183842190328 -0.434688287468563
print(dfss['triangular_smoothing7'].corr(dfss['PDO']))
print(dfss['triangular_smoothing7'].corr(dfss['ONI']))
print(dfss['triangular_smoothing7'].corr(dfss['DMI']))
-0.37753744506375353 -0.7241869895220205 -0.43177332944740954
print(dfss['gaussian_smoothing3'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing3'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing3'].corr(dfss['DMI']))
-0.42156596008633823 -0.7565829698989535 -0.39914548185486776
print(dfss['gaussian_smoothing5'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing5'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing5'].corr(dfss['DMI']))
-0.48106799962223296 -0.7343367545256421 -0.3069307687397145
print(dfss['gaussian_smoothing7'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing7'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing7'].corr(dfss['DMI']))
-0.5264503493640501 -0.6789521697552502 -0.21449888160440037
print(dfss['KZ_Filter3'].corr(dfss['PDO']))
print(dfss['KZ_Filter3'].corr(dfss['ONI']))
print(dfss['KZ_Filter3'].corr(dfss['DMI']))
-0.3992976253520283 -0.7104839786261486 -0.4093174457233314
print(dfss['KZ_Filter5'].corr(dfss['PDO']))
print(dfss['KZ_Filter5'].corr(dfss['ONI']))
print(dfss['KZ_Filter5'].corr(dfss['DMI']))
-0.47341211867694794 -0.7407472307036023 -0.37484337107430393
print(dfss['KZ_Filter7'].corr(dfss['PDO']))
print(dfss['KZ_Filter7'].corr(dfss['ONI']))
print(dfss['KZ_Filter7'].corr(dfss['DMI']))
-0.5253820424791045 -0.7445675829844174 -0.3244072664842779
print(dfss['7_month_MA'].corr(dfss['PDO']))
print(dfss['7_month_MA'].corr(dfss['ONI']))
print(dfss['7_month_MA'].corr(dfss['DMI']))
-0.44005512412229053 -0.7602157168323691 -0.2780914608881651
print(dfss['3_month_MA'].corr(dfss['PDO']))
print(dfss['3_month_MA'].corr(dfss['ONI']))
print(dfss['3_month_MA'].corr(dfss['DMI']))
-0.35261859735597595 -0.7303401686202977 -0.4172781119915262
import scipy.stats as stats
correlation, p_value = stats.pearsonr(dfss['5_month_MA'], dfss['ONI'])
print(f'Correlation between 5-month moving average and ONI: {correlation:.2f}', f'P-value: {p_value}')
Correlation between 5-month moving average and ONI: -0.77 P-value: 5.366513559816488e-51
import scipy.stats as stats
correlation, p_value = stats.pearsonr(dfss['7_month_MA'], dfss['ONI'])
print(f'Correlation between 5-month moving average and ONI: {correlation:.2f}', f'P-value: {p_value}')
Correlation between 5-month moving average and ONI: -0.76 P-value: 9.855421507174027e-49
#give corelation heat map
import seaborn as sns
dfsss = dfss.drop(['Year', 'index', 'Date'], axis=1)
plt.figure(figsize=(18, 10))
sns.heatmap(dfsss.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()
seasonal = pd.DataFrame()
seasonal["7_month_MA"] = dfss["7_month_MA"].values
print(onia.head())
SEAS Year Total ANOM Month Date 636 DJF 2003 27.51 0.92 1 2003-01-01 637 JFM 2003 27.41 0.63 2 2003-02-01 638 FMA 2003 27.58 0.38 3 2003-03-01 639 MAM 2003 27.56 -0.04 4 2003-04-01 640 AMJ 2003 27.48 -0.26 5 2003-05-01
import pandas as pd
import numpy as np
dates = pd.date_range(start='2003-01-01', end='2023-12-31', freq='M')
precip_data = dfss["Value"].values
# Create DataFrame
df_precip = pd.DataFrame({
'Date': dates,
'Precipitation': precip_data
})
df_precip['7_month_MA'] = df_precip['Precipitation'].rolling(window=7, min_periods=1).mean()
# Extract month from the date
df_precip['Month'] = df_precip['Date'].dt.month
# Calculate the long-term monthly averages
monthly_averages = df_precip.groupby('Month')['7_month_MA'].mean()
# Map the long-term averages back onto the DataFrame using the month
df_precip['Monthly_Average'] = df_precip['Month'].apply(lambda x: monthly_averages[x])
# Calculate anomalies
df_precip['Anomaly'] = df_precip['7_month_MA'] - df_precip['Monthly_Average']
plt.figure(figsize=(12, 6))
plt.plot(df_precip['Date'], df_precip['7_month_MA'], label='Anomaly', color='blue')
plt.fill_between(df_precip['Date'], df_precip['7_month_MA'], 0, where=df_precip['7_month_MA'] >= 0, color='red', alpha=0.6)
plt.fill_between(df_precip['Date'], df_precip['7_month_MA'], 0, where=df_precip['7_month_MA'] < 0, color='blue', alpha=0.6)
plt.title('7-Month Moving Average Anomalies in Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.axhline(0, color='red', linestyle='--', linewidth=0.8) # Zero anomaly line
plt.legend()
plt.grid(True)
plt.show()
df_precip['7_month_MA'].corr(dfss['ONI'])
-0.7602157168323691
def get_season(month):
if month in [12, 1, 2]:
return 'DJF' # Winter
elif month in [3, 4, 5]:
return 'MAM' # Spring
elif month in [6, 7, 8]:
return 'JJA' # Summer
elif month in [9, 10, 11]:
return 'SON' # Autumn
# Apply the function to create a Season column
df_precip['Season'] = df_precip['Date'].dt.month.apply(get_season)
def get_season(month):
if month in [12, 1, 2]:
return 'DJF' # Winter
elif month in [3, 4, 5]:
return 'MAM' # Spring
elif month in [6, 7, 8]:
return 'JJA' # Summer
elif month in [9, 10, 11]:
return 'SON' # Autumn
# Apply the function to create a Season column
df_precip['Season'] = df_precip['Date'].dt.month.apply(get_season)
# Group by year and season and calculate the mean anomaly
seasonal_anomalies1 = df_precip.groupby([df_precip['Date'].dt.year, 'Season'])['Anomaly'].mean().unstack()
print(seasonal_anomalies1.head())
#swap the column data where mam should be the second and the jja should be the third and son should be the fourth
column_names = ["DJF", "MAM", "JJA", "SON"]
seasonal_anomalies = seasonal_anomalies1[column_names]
seasonal_anomalies["DJF"] = seasonal_anomalies1["DJF"]
seasonal_anomalies["MAM"] = seasonal_anomalies1["MAM"]
seasonal_anomalies["JJA"] = seasonal_anomalies1["JJA"]
seasonal_anomalies["SON"] = seasonal_anomalies1["SON"]
print(seasonal_anomalies.head())
Season DJF JJA MAM SON Date 2003 -0.020808 -0.036521 -0.012247 -0.045926 2004 -0.053504 -0.043280 -0.054735 -0.043186 2005 -0.033881 -0.023153 -0.044261 0.001096 2006 -0.012835 0.003152 0.025943 -0.047733 2007 -0.049259 0.003395 -0.043932 0.002062 Season DJF MAM JJA SON Date 2003 -0.020808 -0.012247 -0.036521 -0.045926 2004 -0.053504 -0.054735 -0.043280 -0.043186 2005 -0.033881 -0.044261 -0.023153 0.001096 2006 -0.012835 0.025943 0.003152 -0.047733 2007 -0.049259 -0.043932 0.003395 0.002062
# Plot seasonal anomalies
seasonal_anomalies.plot(kind='bar', figsize=(18, 6), width=0.8)
plt.title('Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.legend(title='Season')
plt.grid(axis='y', linestyle='--', linewidth=0.7)
plt.show()
kss = dfss.drop(['Year', 'index','Value','5_month_MA','PDO','DMI','3_month_MA','7_month_MA','triangular_smoothing3','triangular_smoothing5','triangular_smoothing7','gaussian_smoothing3','gaussian_smoothing5','gaussian_smoothing7','KZ_Filter3','KZ_Filter5','KZ_Filter7'], axis=1)
print(kss.head())
kss["ONI"] = dfss["ONI"].values
Date ONI 0 2003-01-01 0.92 1 2003-02-01 0.63 2 2003-03-01 0.38 3 2003-04-01 -0.04 4 2003-05-01 -0.26
# Assume df_enso is a DataFrame with ENSO data, where ONI values are monthly and have been processed similarly
# Merge ENSO data
df_full = df_precip.merge(kss, on='Date', how='left')
df_full["ONI"] = dfss["ONI"].values
print(df_full.head())
# Group by season and calculate correlations
correlations = df_full.groupby('Season').apply(lambda x: x['7_month_MA'].corr(x['ONI']))
print(correlations)
Date Precipitation 7_month_MA Month Monthly_Average Anomaly \ 0 2003-01-31 -0.008463 -0.008463 1 0.002287 -0.010750 1 2003-02-28 0.031241 0.011389 2 0.003192 0.008198 2 2003-03-31 -0.033798 -0.003673 3 0.002323 -0.005996 3 2003-04-30 -0.020413 -0.007858 4 0.001971 -0.009829 4 2003-05-31 -0.068027 -0.019892 5 0.001024 -0.020916 Season ONI 0 DJF 0.92 1 DJF 0.63 2 MAM 0.38 3 MAM -0.04 4 MAM -0.26 Season DJF -0.777837 JJA -0.677940 MAM -0.683357 SON -0.812986 dtype: float64
# Assuming 'seasonal_anomalies' is your DataFrame with anomalies by season
autumn_anomalies = seasonal_anomalies['SON'] # Assuming 'SON' is the column for Autumn anomalies
plt.figure(figsize=(12, 6)) # Set the figure size as desired
autumn_anomalies.plot(kind='bar', color='orange') # Use a color that stands out for Autumn
plt.title('Autumn (SON) Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.axhline(0, color='black', linestyle='--', linewidth=1) # Zero anomaly line for reference
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
print(autumn_anomalies)
Date 2003 -0.045926 2004 -0.043186 2005 0.001096 2006 -0.047733 2007 0.002062 2008 0.030507 2009 -0.028871 2010 0.097733 2011 -0.010385 2012 -0.008289 2013 0.043498 2014 -0.027724 2015 -0.093207 2016 0.051357 2017 0.058914 2018 -0.028862 2019 -0.070144 2020 0.057543 2021 0.036540 2022 0.063787 2023 -0.038711 Name: SON, dtype: float64
import matplotlib.pyplot as plt
# Filter the data for Autumn (SON) season
Autumn_ENO = onia[onia['SEAS'] == 'SON']
# Get unique years
unique_years = Autumn_ENO['Year'].unique()
# Plot the data
plt.figure(figsize=(15, 8))
plt.bar(Autumn_ENO['Year'], Autumn_ENO['ANOM'], color='blue')
plt.title('Oceanic Niño Index in Autumn (SON) Seasons')
plt.xlabel('Year')
plt.ylabel('ONI Value')
plt.grid(True)
# Set x-ticks to unique years
plt.xticks(unique_years)
plt.show()
fig, axs = plt.subplots(2, 1, figsize=(18, 10))
# Plot the first graph in the first subplot
axs[0].bar(Autumn_ENO['Year'], Autumn_ENO['ANOM'], color='blue')
axs[0].set_title('Oceanic Niño Index in Autumn (SON) Seasons')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('ONI Value')
axs[0].grid(True)
axs[0].set_xticks(unique_years)
axs[0].tick_params(axis='x', rotation=90)
# Plot the second graph in the second subplot
autumn_anomalies.plot(kind='bar', ax=axs[1], color='orange') # Use a color that stands out for Autumn
axs[1].set_title('Autumn (SON) Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Anomaly (mm)')
axs[1].axhline(0, color='black', linestyle='--', linewidth=1) # Zero anomaly line for reference
axs[1].grid(axis='y', linestyle='--', alpha=0.7)
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Creating a DataFrame
data = pd.DataFrame({
'Date': df_full['Date'],
'ENSO': df_full["ONI"],
'Precipitation_MA7': df_full['7_month_MA']
})
data.set_index('Date', inplace=True)
# 2. Plotting the generated data
plt.figure(figsize=(14, 6))
plt.plot(data.index, data['Precipitation_MA7'], label='7-Month MA Precipitation (mm/hr)')
plt.plot(data.index, data['ENSO'], label='ENSO Index', alpha=0.7)
plt.title('Simulated Precipitation and ENSO Index (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()
# 3. Differencing the data to make it stationary
data['Precipitation_MA7_diff'] = data['Precipitation_MA7'].diff().dropna()
data['ENSO_diff'] = data['ENSO'].diff().dropna()
# Dropping NaN values resulting from differencing
data_diff = data.dropna()
# 4. Re-fitting the SARIMAX model on differenced data
model_diff = SARIMAX(data_diff['Precipitation_MA7_diff'], exog=data_diff[['ENSO_diff']],
order=(1,0,1), seasonal_order=(1,0,1,12))
model_fit_diff = model_diff.fit(disp=False)
# 5. Analyzing the model
# Summary of the model
print(model_fit_diff.summary())
# Generating the residuals plot and ACF/PACF plots for the differenced data
residuals_diff = model_fit_diff.resid
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
plot_acf(residuals_diff, lags=30, ax=ax1)
plot_pacf(residuals_diff, lags=30, ax=ax2)
plt.show()
# Displaying the first few rows of the differenced data
print(data_diff.head())
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency M will be used.
self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency M will be used.
self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
==========================================================================================
Dep. Variable: Precipitation_MA7_diff No. Observations: 251
Model: SARIMAX(1, 0, 1)x(1, 0, 1, 12) Log Likelihood 792.111
Date: Sun, 25 Aug 2024 AIC -1572.223
Time: 12:48:00 BIC -1551.070
Sample: 02-28-2003 HQIC -1563.710
- 12-31-2023
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ENSO_diff -0.0257 0.005 -5.103 0.000 -0.036 -0.016
ar.L1 0.5611 0.192 2.915 0.004 0.184 0.938
ma.L1 -0.3072 0.220 -1.393 0.164 -0.739 0.125
ar.S.L12 -0.3452 0.303 -1.139 0.255 -0.939 0.249
ma.S.L12 0.5098 0.284 1.796 0.072 -0.047 1.066
sigma2 0.0001 1.07e-05 9.896 0.000 8.46e-05 0.000
===================================================================================
Ljung-Box (L1) (Q): 0.82 Jarque-Bera (JB): 0.41
Prob(Q): 0.37 Prob(JB): 0.81
Heteroskedasticity (H): 0.99 Skew: 0.03
Prob(H) (two-sided): 0.96 Kurtosis: 2.81
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
ENSO Precipitation_MA7 Precipitation_MA7_diff ENSO_diff Date 2003-02-28 0.63 0.011389 0.019852 -0.29 2003-03-31 0.38 -0.003673 -0.015062 -0.25 2003-04-30 -0.04 -0.007858 -0.004185 -0.42 2003-05-31 -0.26 -0.019892 -0.012034 -0.22 2003-06-30 -0.16 -0.032879 -0.012987 0.10
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt
# Load your dataset
# Assuming your DataFrame is named df and has a datetime index
# df = pd.read_csv('your_data.csv', index_col='date', parse_dates=True)
# Filter the data to include only the training period
train = df_full[df_full["Date"]<'2021-01']
# Define the model
model = SARIMAX(train['7_month_MA'],
exog=train['ONI'],
order=(1, 0, 1),
seasonal_order=(1, 0, 1, 12),
enforce_stationarity=False,
enforce_invertibility=False)
# Fit the model
results = model.fit()
# Predict the values of precipitation for the test period 2018-2023
# Filter the data to include only the test period
test = df_full[df_full["Date"] >= '2021-01']
print (test.head())
# Predict the values for the test period
predictions = results.get_forecast(steps=len(test), exog=test['ONI'])
predicted_mean = predictions.predicted_mean
plt.figure(figsize=(14, 6))
plt.plot(df_full['Date'], df_full['7_month_MA'], label='7-Month MA Precipitation (mm/hr)')
plt.plot(train['Date'], train['7_month_MA'], label='Training Data', color='blue')
plt.plot(test['Date'], predicted_mean, label='Predictions', color='red')
plt.title('7-Month MA Precipitation Predictions (2018-2023)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()
print(test['7_month_MA'])
Date Precipitation 7_month_MA Month Monthly_Average Anomaly \
216 2021-01-31 0.115892 0.057635 1 0.002287 0.055348
217 2021-02-28 0.078640 0.059473 2 0.003192 0.056282
218 2021-03-31 -0.000178 0.056523 3 0.002323 0.054200
219 2021-04-30 -0.058888 0.035828 4 0.001971 0.033857
220 2021-05-31 0.016339 0.029131 5 0.001024 0.028107
Season ONI
216 DJF -1.05
217 DJF -0.93
218 MAM -0.84
219 MAM -0.66
220 MAM -0.48
216 0.057635 217 0.059473 218 0.056523 219 0.035828 220 0.029131 221 0.020640 222 0.023172 223 0.021049 224 0.027064 225 0.028446 226 0.054110 227 0.058054 228 0.054751 229 0.065933 230 0.052875 231 0.034326 232 0.036375 233 0.028434 234 0.032837 235 0.053005 236 0.048436 237 0.063988 238 0.078938 239 0.081712 240 0.070313 241 0.070429 242 0.050743 243 0.034198 244 0.014656 245 -0.003346 246 -0.005922 247 -0.011845 248 -0.028761 249 -0.040912 250 -0.046461 251 -0.059636 Name: 7_month_MA, dtype: float64
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
# Generate synthetic monthly ENSO and precipitation data
# Create DataFrame
data = pd.DataFrame({
'Date': df_full['Date'],
'ENSO': df_full['ONI'],
'Precipitation': df_full['7_month_MA']
}).set_index('Date')
# Compute the cross-correlation
lags = np.arange(-7, 7)
correlations = []
for lag in lags:
shifted_enso = data['ENSO'].shift(lag).dropna()
aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
common_index = shifted_enso.index.intersection(aligned_precipitation.index)
correlation = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
correlations.append(correlation)
# Plot the cross-correlation
plt.figure(figsize=(10, 5))
plt.plot(lags, correlations, marker='o', linestyle='-', color='b')
plt.title('Cross-Correlation between ENSO and Precipitation')
plt.xlabel('Lag (months)')
plt.ylabel('Correlation Coefficient')
plt.axhline(0, color='black', linewidth=0.5, linestyle='--')
plt.axvline(0, color='black', linewidth=0.5, linestyle='--')
plt.grid(True)
plt.show()
print(len(shifted_enso))
print(len(aligned_precipitation))
246 246
print(len(df_full['7_month_MA']))
252
data = pd.DataFrame({
'Date': df_full['Date'],
'ENSO': df_full['ONI'],
'Precipitation': df_full['7_month_MA']
}).set_index('Date')
lag = 1
shifted_enso = data['ENSO'].shift(lag).dropna()
aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
common_index = shifted_enso.index.intersection(aligned_precipitation.index)
correlation = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
correlations.append(correlation)
print(correlation)
-0.7738119187015284
data = pd.DataFrame({
'Date': df_full['Date'],
'ENSO': df_full['ONI'],
'Precipitation': df_full['7_month_MA']
}).set_index('Date')
lag = 0
shifted_enso = data['ENSO'].shift(lag).dropna()
aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
common_index = shifted_enso.index.intersection(aligned_precipitation.index)
correlation1 = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
correlations.append(correlation1)
print(correlation1)
-0.7602157168323697
print(correlation-correlation1)
-0.013596201869158775
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import r2_score
# Load the data
data = pd.read_csv('pci.csv', index_col='Date', parse_dates=True)
data_2023 = data[data.index.year == 2023]
print(data_2023.head())
index Year Value Date 2023-01-01 Jan 2023 0.338154 2023-02-01 Feb 2023 0.419176 2023-03-01 Mar 2023 0.282736 2023-04-01 Apr 2023 0.254354 2023-05-01 May 2023 0.269012
# Split the data into training and testing sets
train_data = data[:'2020']
test_data21 = data[data.index.year == 2021]
test_data22 = data[data.index.year == 2022]
test_data23 = data[data.index.year == 2023]
# Define and fit the SARIMA model
model = SARIMAX(train_data['Value'], order=(3, 0, 8), seasonal_order=(2, 1, 0, 12))
fitted_model = model.fit(disp=False)
# Forecast for the year 2023
forecast = fitted_model.get_forecast(steps=12)
forecast_mean = forecast.predicted_mean
forecast_conf_int = forecast.conf_int()
# Calculate the R² value
r2 = r2_score(test_data22['Value'], forecast_mean)
# Create a DataFrame for comparison
comparison_df = pd.DataFrame({
'Actual': test_data22['Value'],
'Forecasted': forecast_mean
})
# Plot the actual vs forecasted values for 2023
plt.figure(figsize=(14, 7))
plt.plot(comparison_df.index, comparison_df['Actual'], marker='o', label='Actual Precipitation', color='red')
plt.plot(comparison_df.index, comparison_df['Forecasted'], marker='x', linestyle='--', label='Forecasted Precipitation', color='green')
plt.title('Actual vs Forecasted Precipitation for 2023')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm/hr)')
plt.xticks(comparison_df.index, [date.strftime('%b') for date in comparison_df.index], rotation=45)
plt.legend()
plt.grid(True)
plt.show()
# Print the R² value
print(f'R² value: {r2}')
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency MS will be used.
self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency MS will be used.
self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
R² value: -1.251487641412906
def adf_test(dataset):
dftest = adfuller(dataset, autolag = 'AIC')
print("1. ADF : ",dftest[0])
print("2. P-Value : ", dftest[1])
print("3. Num Of Lags : ", dftest[2])
print("4. Num Of Observations Used For ADF Regression and Critical Values Calculation :", dftest[3])
print("5. Critical Values :")
for key, val in dftest[4].items():
print("\t",key, ": ", val)
adf_test(train_data['Value'])
1. ADF : -3.880195537739565 2. P-Value : 0.002185529158256005 3. Num Of Lags : 13 4. Num Of Observations Used For ADF Regression and Critical Values Calculation : 202 5. Critical Values : 1% : -3.4631437906252636 5% : -2.8759570379821047 10% : -2.574454682874228
d = pd.DataFrame()
d["diff"] = train_data["Value"].diff(12)
d["diff"].plot()
adf_test(d["diff"].dropna())
print(d["diff"])
1. ADF : -3.4871391650542938
2. P-Value : 0.008323596340899617
3. Num Of Lags : 15
4. Num Of Observations Used For ADF Regression and Critical Values Calculation : 188
5. Critical Values :
1% : -3.465620397124192
5% : -2.8770397560752436
10% : -2.5750324547306476
Date
2003-01-01 NaN
2003-02-01 NaN
2003-03-01 NaN
2003-04-01 NaN
2003-05-01 NaN
...
2020-08-01 0.121426
2020-09-01 0.209610
2020-10-01 0.142927
2020-11-01 0.201913
2020-12-01 0.116186
Name: diff, Length: 216, dtype: float64
from pmdarima import auto_arima
import warnings
warnings.filterwarnings("ignore")
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Plot ACF and PACF
plot_acf(train_data['Value'].diff(12).dropna(), lags=40)
plot_pacf(train_data['Value'].diff(12).dropna(), lags=40)
plt.show()
import itertools
from statsmodels.tsa.statespace.sarimax import SARIMAX
# Define the p, d, q, P, D, Q, m ranges to search over
p = q = P = Q = range(0, 3) # typically, start with small values like 0, 1, 2
d = D = [0, 1]
m = [12] # Monthly data, so seasonality period is 12 months
# Create a list of all possible combinations of parameters
parameters = list(itertools.product(p, d, q, P, D, Q, m))
best_aic = float("inf")
best_params = None
# Iterate over all parameter combinations
for params in parameters:
try:
model = SARIMAX(train_data["Value"],
order=(params[0], params[1], params[2]),
seasonal_order=(params[3], params[4], params[5], params[6])
).fit(disp=False)
if model.aic < best_aic:
best_aic = model.aic
best_params = params
except:
continue
print(f"Best Parameters: {best_params} with AIC: {best_aic}")
Best Parameters: (1, 1, 2, 1, 0, 1, 12) with AIC: -596.1817683487355
stepwise_fit = auto_arima(train_data['Value'], start_p=0, start_q=0,
max_p=4, max_q=4, m=12,
seasonal=True,
d=None, D=1, trace=True,
error_action='ignore', # don't want to know if an order does not work
suppress_warnings=True, # don't want convergence warnings
stepwise=True) # set to stepwise
stepwise_fit.summary()
Performing stepwise search to minimize aic ARIMA(0,0,0)(1,1,1)[12] intercept : AIC=inf, Time=0.64 sec ARIMA(0,0,0)(0,1,0)[12] intercept : AIC=-387.387, Time=0.08 sec ARIMA(1,0,0)(1,1,0)[12] intercept : AIC=-482.037, Time=0.60 sec ARIMA(0,0,1)(0,1,1)[12] intercept : AIC=inf, Time=0.84 sec ARIMA(0,0,0)(0,1,0)[12] : AIC=-388.893, Time=0.05 sec ARIMA(1,0,0)(0,1,0)[12] intercept : AIC=-448.519, Time=0.12 sec ARIMA(1,0,0)(2,1,0)[12] intercept : AIC=-514.842, Time=1.19 sec ARIMA(1,0,0)(2,1,1)[12] intercept : AIC=inf, Time=3.07 sec ARIMA(1,0,0)(1,1,1)[12] intercept : AIC=inf, Time=0.92 sec ARIMA(0,0,0)(2,1,0)[12] intercept : AIC=-472.323, Time=0.74 sec ARIMA(2,0,0)(2,1,0)[12] intercept : AIC=-536.507, Time=2.17 sec ARIMA(2,0,0)(1,1,0)[12] intercept : AIC=-504.196, Time=0.53 sec ARIMA(2,0,0)(2,1,1)[12] intercept : AIC=inf, Time=2.98 sec ARIMA(2,0,0)(1,1,1)[12] intercept : AIC=inf, Time=1.10 sec ARIMA(3,0,0)(2,1,0)[12] intercept : AIC=-536.590, Time=2.91 sec ARIMA(3,0,0)(1,1,0)[12] intercept : AIC=-505.133, Time=0.68 sec ARIMA(3,0,0)(2,1,1)[12] intercept : AIC=inf, Time=3.20 sec ARIMA(3,0,0)(1,1,1)[12] intercept : AIC=inf, Time=1.67 sec ARIMA(4,0,0)(2,1,0)[12] intercept : AIC=-534.605, Time=2.87 sec ARIMA(3,0,1)(2,1,0)[12] intercept : AIC=-534.586, Time=3.32 sec ARIMA(2,0,1)(2,1,0)[12] intercept : AIC=-536.259, Time=3.06 sec ARIMA(4,0,1)(2,1,0)[12] intercept : AIC=-532.613, Time=3.06 sec ARIMA(3,0,0)(2,1,0)[12] : AIC=-538.484, Time=1.03 sec ARIMA(3,0,0)(1,1,0)[12] : AIC=-507.042, Time=0.57 sec ARIMA(3,0,0)(2,1,1)[12] : AIC=inf, Time=3.38 sec ARIMA(3,0,0)(1,1,1)[12] : AIC=inf, Time=1.11 sec ARIMA(2,0,0)(2,1,0)[12] : AIC=-538.391, Time=0.96 sec ARIMA(4,0,0)(2,1,0)[12] : AIC=-536.499, Time=1.76 sec ARIMA(3,0,1)(2,1,0)[12] : AIC=-536.480, Time=1.17 sec ARIMA(2,0,1)(2,1,0)[12] : AIC=-538.152, Time=1.26 sec ARIMA(4,0,1)(2,1,0)[12] : AIC=-535.119, Time=2.18 sec Best model: ARIMA(3,0,0)(2,1,0)[12] Total fit time: 49.254 seconds
| Dep. Variable: | y | No. Observations: | 216 |
|---|---|---|---|
| Model: | SARIMAX(3, 0, 0)x(2, 1, 0, 12) | Log Likelihood | 275.242 |
| Date: | Sun, 25 Aug 2024 | AIC | -538.484 |
| Time: | 12:55:00 | BIC | -518.575 |
| Sample: | 01-01-2003 | HQIC | -530.431 |
| - 12-01-2020 | |||
| Covariance Type: | opg |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| ar.L1 | 0.2698 | 0.059 | 4.542 | 0.000 | 0.153 | 0.386 |
| ar.L2 | 0.3031 | 0.074 | 4.110 | 0.000 | 0.159 | 0.448 |
| ar.L3 | 0.1016 | 0.079 | 1.290 | 0.197 | -0.053 | 0.256 |
| ar.S.L12 | -0.5436 | 0.067 | -8.130 | 0.000 | -0.675 | -0.413 |
| ar.S.L24 | -0.4087 | 0.074 | -5.554 | 0.000 | -0.553 | -0.264 |
| sigma2 | 0.0038 | 0.000 | 9.912 | 0.000 | 0.003 | 0.005 |
| Ljung-Box (L1) (Q): | 0.00 | Jarque-Bera (JB): | 0.01 |
|---|---|---|---|
| Prob(Q): | 0.96 | Prob(JB): | 1.00 |
| Heteroskedasticity (H): | 1.19 | Skew: | -0.01 |
| Prob(H) (two-sided): | 0.48 | Kurtosis: | 3.02 |
stepwise_fit.summary()
| Dep. Variable: | y | No. Observations: | 216 |
|---|---|---|---|
| Model: | SARIMAX(3, 0, 0)x(2, 1, 0, 12) | Log Likelihood | 275.242 |
| Date: | Sun, 25 Aug 2024 | AIC | -538.484 |
| Time: | 12:55:00 | BIC | -518.575 |
| Sample: | 01-01-2003 | HQIC | -530.431 |
| - 12-01-2020 | |||
| Covariance Type: | opg |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| ar.L1 | 0.2698 | 0.059 | 4.542 | 0.000 | 0.153 | 0.386 |
| ar.L2 | 0.3031 | 0.074 | 4.110 | 0.000 | 0.159 | 0.448 |
| ar.L3 | 0.1016 | 0.079 | 1.290 | 0.197 | -0.053 | 0.256 |
| ar.S.L12 | -0.5436 | 0.067 | -8.130 | 0.000 | -0.675 | -0.413 |
| ar.S.L24 | -0.4087 | 0.074 | -5.554 | 0.000 | -0.553 | -0.264 |
| sigma2 | 0.0038 | 0.000 | 9.912 | 0.000 | 0.003 | 0.005 |
| Ljung-Box (L1) (Q): | 0.00 | Jarque-Bera (JB): | 0.01 |
|---|---|---|---|
| Prob(Q): | 0.96 | Prob(JB): | 1.00 |
| Heteroskedasticity (H): | 1.19 | Skew: | -0.01 |
| Prob(H) (two-sided): | 0.48 | Kurtosis: | 3.02 |
#perform the model
model = SARIMAX(train_data['Value'], order=(1, 1,2), seasonal_order=(1, 0, 1, 12))
results = model.fit()
print(results.summary())
# Forecast for the year 2023
forecast = results.get_forecast(steps=12)
forecast_mean = forecast.predicted_mean
forecast_conf_int = forecast.conf_int()
print(get_forecast(steps=12))
# Calculate the R² value
r2 = r2_score(test_data21['Value'], forecast_mean)
print(f'R² value: {r2}')
# Create a DataFrame for comparison
comparison_df = pd.DataFrame({
'Actual': test_data21['Value'],
'Forecasted': forecast_mean
})
# Plot the actual vs forecasted values for 2023
plt.figure(figsize=(14, 7))
plt.plot(comparison_df.index, comparison_df['Actual'], marker='o', label='Actual Precipitation', color='red')
plt.plot(comparison_df.index, comparison_df['Forecasted'], marker='x', linestyle='--', label='Forecasted Precipitation', color='green')
plt.title('Actual vs Forecasted Precipitation for 2023')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm/hr)')
plt.xticks(comparison_df.index, [date.strftime('%b') for date in comparison_df.index], rotation=45)
plt.legend()
plt.grid(True)
plt.show()
SARIMAX Results
============================================================================================
Dep. Variable: Value No. Observations: 216
Model: SARIMAX(1, 1, 2)x(1, 0, [1], 12) Log Likelihood 304.091
Date: Sun, 25 Aug 2024 AIC -596.182
Time: 12:55:01 BIC -575.958
Sample: 01-01-2003 HQIC -588.010
- 12-01-2020
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.7935 0.099 7.979 0.000 0.599 0.988
ma.L1 -1.4008 0.140 -10.032 0.000 -1.675 -1.127
ma.L2 0.4082 0.135 3.028 0.002 0.144 0.672
ar.S.L12 0.9979 0.011 93.847 0.000 0.977 1.019
ma.S.L12 -0.9432 0.141 -6.711 0.000 -1.219 -0.668
sigma2 0.0034 0.001 6.460 0.000 0.002 0.004
===================================================================================
Ljung-Box (L1) (Q): 0.17 Jarque-Bera (JB): 0.86
Prob(Q): 0.68 Prob(JB): 0.65
Heteroskedasticity (H): 1.05 Skew: -0.07
Prob(H) (two-sided): 0.83 Kurtosis: 2.72
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[103], line 11 8 forecast_mean = forecast.predicted_mean 9 forecast_conf_int = forecast.conf_int() ---> 11 print(get_forecast(steps=12)) 12 # Calculate the R² value 13 r2 = r2_score(test_data21['Value'], forecast_mean) NameError: name 'get_forecast' is not defined
plt.plot(test_data21['Value'], label='Actual')
plt.plot(forecast_mean, label='Predicted')
[<matplotlib.lines.Line2D at 0x21318dbb2d0>]
plt.plot(test_data22['Value'], label='Actual')
[<matplotlib.lines.Line2D at 0x21318fda450>]
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse = sqrt(mean_squared_error(test_data['Value'], forecast_mean))
print(f'RMSE: {rmse:.2f}')
RMSE: 0.08
# Assuming lsas is a DataFrame or Series
plt.figure(figsize=(14, 7))
plt.plot(lsa, color='blue', label='LSA Data')
plt.title(' Average precipitation of Indonesia Data Over Time')
plt.xlabel('Time')
plt.ylabel('LSA Value')
plt.legend()
plt.grid(True)
plt.show()
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# Load the dataset
file_path = 'pci.csv'
df = pd.read_csv(file_path)
# Convert 'Date' column to datetime format and set it as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
# Only keep the 'Value' column for modeling
df = df[['Value']]
# Split the data into training (2003-2022) and testing (2023)
train = df[:'2020-12-01']
test = df['2021-01-01':'2021-12-01']
test2 = df['2022-01-01':'2022-12-01']
test1 = df['2023-01-01':]
# Define the SARIMAX model
model = SARIMAX(train['Value'], order=(2, 0, 1), seasonal_order=(1, 1, 1, 12))
# Fit the model
model_fit = model.fit(disp=False)
# Forecast for the test period (2023)
forecast = model_fit.get_forecast(steps=len(test2))
predicted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()
# Calculate RMSE for evaluation
rmse = np.sqrt(mean_squared_error(test2['Value'], predicted_values))
print(f'RMSE: {rmse}')
# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(train.index, train['Value'], label='Train')
plt.plot(test.index, test2['Value'], label='Test', color='orange')
plt.plot(test.index, predicted_values, label='Forecast', color='green')
plt.fill_between(test.index,
confidence_intervals.iloc[:, 0],
confidence_intervals.iloc[:, 1], color='k', alpha=.2)
plt.title('SARIMAX Model - Forecast vs Actuals')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()
# Print the forecasted values and confidence intervals
print('Forecasted Values:')
print(predicted_values)
print('\nConfidence Intervals:')
print(confidence_intervals)
RMSE: 0.071913014053073
Forecasted Values:
2021-01-01 0.375824
2021-02-01 0.347584
2021-03-01 0.340026
2021-04-01 0.316241
2021-05-01 0.301906
2021-06-01 0.290242
2021-07-01 0.240718
2021-08-01 0.195128
2021-09-01 0.213559
2021-10-01 0.255150
2021-11-01 0.311400
2021-12-01 0.391716
Freq: MS, Name: predicted_mean, dtype: float64
Confidence Intervals:
lower Value upper Value
2021-01-01 0.267925 0.483723
2021-02-01 0.234356 0.460811
2021-03-01 0.219016 0.461035
2021-04-01 0.191486 0.440996
2021-05-01 0.174525 0.429288
2021-06-01 0.161224 0.419260
2021-07-01 0.110629 0.370807
2021-08-01 0.064348 0.325908
2021-09-01 0.082332 0.344785
2021-10-01 0.123638 0.386663
2021-11-01 0.179714 0.443086
2021-12-01 0.259919 0.523514
#plot the data of precipitation and actual data of 2023
plt.figure(figsize=(14, 7))
plt.plot(test2, color='blue', label='Precipitation Data')
plt.plot(predicted_values, color='red', label='Actual Precipitation Data')
plt.title('Precipitation Data Over Time')
plt.xlabel('Time')
plt.ylabel('Precipitation Value')
plt.legend()
plt.grid(True)
plt.show()
#predict the data of 2022
forecast = model_fit.get_forecast(steps=len(test1))
predicted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()
# Calculate RMSE for evaluation
rmse = np.sqrt(mean_squared_error(test1['Value'], predicted_values))
print(f'RMSE: {rmse}')
# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(train.index, train['Value'], label='Train')
plt.plot(test.index, test1['Value'], label='Test', color='orange')
plt.plot(test.index, predicted_values, label='Forecast', color='green')
plt.fill_between(test.index,
confidence_intervals.iloc[:, 0],
confidence_intervals.iloc[:, 1], color='k', alpha=.2)
plt.title('SARIMAX Model - Forecast vs Actuals')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()
RMSE: 0.0698779432145898
r21 = r2_score(lsa.values,test1)
r2 = r2_score(predicted_values,test1)
print(f'R² value: {r2}')
print(f'R² value: {r21}')
R² value: -0.3793246262297929 R² value: -0.3149617784509602
data_diff['ENSO_diff']
Date
2003-02-28 -0.29
2003-03-31 -0.25
2003-04-30 -0.42
2003-05-31 -0.22
2003-06-30 0.10
...
2023-08-31 0.25
2023-09-30 0.24
2023-10-31 0.22
2023-11-30 0.14
2023-12-31 0.03
Name: ENSO_diff, Length: 251, dtype: float64
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# Load the dataset
file_path = 'pci.csv'
df = pd.read_csv(file_path)
# Convert 'Date' column to datetime format and set it as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
# Only keep the 'Value' column for modeling
df = df[['Value']]
# Create a synthetic exogenous variable (e.g., a random series that might affect 'Value')
np.random.seed(0)
df['Exogenous'] = np.random.normal(0, 1, len(df))
# Split the data into training (2003-2022) and testing (2023)
train = df[:'2020-12-01']
test = df['2021-01-01':'2021-12-01']
test2 = df['2022-01-01':'2022-12-01']
test1 = df['2023-01-01':]
print(len(data_diff))
# Prepare the exogenous variables for train and test sets
exog_train = train[['Exogenous']]
exog_test = test[['Exogenous']]
# Define the ARIMAX model
model = SARIMAX(train['Value'], exog=data_diff[['ENSO_diff']], order=(2, 0, 2),seasonal_order=(1, 0, 1, 12))
# Fit the model
model_fit = model.fit(disp=False)
# Forecast for the test period (2023) using the exogenous variables
forecast = model_fit.get_forecast(steps=len(test), exog=exog_test)
predicted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()
# Calculate RMSE for evaluation
rmse = np.sqrt(mean_squared_error(test['Value'], predicted_values))
print(f'RMSE: {rmse}')
# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(train.index, train['Value'], label='Train')
plt.plot(test.index, test['Value'], label='Test', color='orange')
plt.plot(test.index, predicted_values, label='Forecast', color='green')
plt.fill_between(test.index,
confidence_intervals.iloc[:, 0],
confidence_intervals.iloc[:, 1], color='k', alpha=.2)
plt.title('ARIMAX Model - Forecast vs Actuals')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()
# Print the forecasted values and confidence intervals
print('Forecasted Values:')
print(predicted_values)
print('\nConfidence Intervals:')
print(confidence_intervals)
print(train['Value'].size())
251
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[464], line 33 30 exog_test = test[['Exogenous']] 32 # Define the ARIMAX model ---> 33 model = SARIMAX(train['Value'], exog=data_diff[['ENSO_diff']], order=(2, 0, 2),seasonal_order=(1, 0, 1, 12)) 35 # Fit the model 36 model_fit = model.fit(disp=False) File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\statespace\sarimax.py:328, in SARIMAX.__init__(self, endog, exog, order, seasonal_order, trend, measurement_error, time_varying_regression, mle_regression, simple_differencing, enforce_stationarity, enforce_invertibility, hamilton_representation, concentrate_scale, trend_offset, use_exact_diffuse, dates, freq, missing, validate_specification, **kwargs) 318 def __init__(self, endog, exog=None, order=(1, 0, 0), 319 seasonal_order=(0, 0, 0, 0), trend=None, 320 measurement_error=False, time_varying_regression=False, (...) 325 freq=None, missing='none', validate_specification=True, 326 **kwargs): --> 328 self._spec = SARIMAXSpecification( 329 endog, exog=exog, order=order, seasonal_order=seasonal_order, 330 trend=trend, enforce_stationarity=None, enforce_invertibility=None, 331 concentrate_scale=concentrate_scale, dates=dates, freq=freq, 332 missing=missing, validate_specification=validate_specification) 333 self._params = SARIMAXParams(self._spec) 335 # Save given orders File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\arima\specification.py:446, in SARIMAXSpecification.__init__(self, endog, exog, order, seasonal_order, ar_order, diff, ma_order, seasonal_ar_order, seasonal_diff, seasonal_ma_order, seasonal_periods, trend, enforce_stationarity, enforce_invertibility, concentrate_scale, trend_offset, dates, freq, missing, validate_specification) 441 exog = np.c_[trend_data, exog] 443 # Create an underlying time series model, to handle endog / exog, 444 # especially validating shapes, retrieving names, and potentially 445 # providing us with a time series index --> 446 self._model = TimeSeriesModel(endog, exog=exog, dates=dates, freq=freq, 447 missing=missing) 448 self.endog = None if faux_endog else self._model.endog 449 self.exog = self._model.exog File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:470, in TimeSeriesModel.__init__(self, endog, exog, dates, freq, missing, **kwargs) 467 def __init__( 468 self, endog, exog=None, dates=None, freq=None, missing="none", **kwargs 469 ): --> 470 super().__init__(endog, exog, missing=missing, **kwargs) 472 # Date handling in indexes 473 self._init_dates(dates, freq) File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:270, in LikelihoodModel.__init__(self, endog, exog, **kwargs) 269 def __init__(self, endog, exog=None, **kwargs): --> 270 super().__init__(endog, exog, **kwargs) 271 self.initialize() File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:95, in Model.__init__(self, endog, exog, **kwargs) 93 missing = kwargs.pop('missing', 'none') 94 hasconst = kwargs.pop('hasconst', None) ---> 95 self.data = self._handle_data(endog, exog, missing, hasconst, 96 **kwargs) 97 self.k_constant = self.data.k_constant 98 self.exog = self.data.exog File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:135, in Model._handle_data(self, endog, exog, missing, hasconst, **kwargs) 134 def _handle_data(self, endog, exog, missing, hasconst, **kwargs): --> 135 data = handle_data(endog, exog, missing, hasconst, **kwargs) 136 # kwargs arrays could have changed, easier to just attach here 137 for key in kwargs: File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\data.py:675, in handle_data(endog, exog, missing, hasconst, **kwargs) 672 exog = np.asarray(exog) 674 klass = handle_data_class_factory(endog, exog) --> 675 return klass(endog, exog=exog, missing=missing, hasconst=hasconst, 676 **kwargs) File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\data.py:89, in ModelData.__init__(self, endog, exog, missing, hasconst, **kwargs) 87 self.k_constant = 0 88 self._handle_constant(hasconst) ---> 89 self._check_integrity() 90 self._cache = {} File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\data.py:533, in PandasData._check_integrity(self) 529 # exog can be None and we could be upcasting one or the other 530 if (exog is not None and 531 (hasattr(endog, 'index') and hasattr(exog, 'index')) and 532 not self.orig_endog.index.equals(self.orig_exog.index)): --> 533 raise ValueError("The indices for endog and exog are not aligned") 534 super(PandasData, self)._check_integrity() ValueError: The indices for endog and exog are not aligned
print(train['Value'].size())
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[465], line 1 ----> 1 print(train['Value'].size()) TypeError: 'int' object is not callable
#plot the data of precipitation and actual data of 2023
plt.figure(figsize=(14, 7))
plt.plot(test['Value'], color='blue', label='Precipitation Data')
plt.plot(predicted_values, color='red', label='predicted Precipitation Data')
plt.plot
plt.title('Precipitation Data Over Time')
plt.xlabel('Time')
plt.ylabel('Precipitation Value')
plt.legend()
plt.grid(True)
plt.show()
r2 = r2_score(test['Value'], predicted_values)
r21 = r2_score(test['Value'], lsa.values)
print(f'R² value: {r2}')
print(f'R² value: {r21}')
R² value: 0.13574720534345386 R² value: 0.10433376681855322
k= df["Value"].diff(36)
print(k)
Date
2003-01-01 NaN
2003-02-01 NaN
2003-03-01 NaN
2003-04-01 NaN
2003-05-01 NaN
...
2023-08-01 -0.076554
2023-09-01 -0.128812
2023-10-01 -0.186202
2023-11-01 -0.121039
2023-12-01 -0.121389
Name: Value, Length: 252, dtype: float64
plt.plot(k.dropna())
[<matplotlib.lines.Line2D at 0x21318e6b550>]
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import r2_score
from itertools import product
import warnings
# Load your data
# Replace 'your_pci_data.csv' and 'your_oni_data.csv' with the paths to your datasets
pci_data = pd.read_csv('pci.csv', parse_dates=['Date'])
oni_data = pd.read_csv('oni_data.csv')
# Parse the Date column in pci_data
pci_data['Date'] = pd.to_datetime(pci_data['Date'])
# Summarize the PCI data by year
pci_yearly = pci_data.groupby(pci_data['Date'].dt.year)['Value'].mean().reset_index()
# Rename columns for clarity
pci_yearly.columns = ['Year', 'Average_PCI']
# Merge the ONI and PCI data on the Year column
merged_data = pd.merge(pci_yearly, oni_data, on='Year', how='inner')
# Display the first few rows of the merged dataset
merged_data.head()
| Year | Average_PCI | SEAS | Total | ANOM | |
|---|---|---|---|---|---|
| 0 | 2003 | 0.245826 | DJF | 27.51 | 0.92 |
| 1 | 2003 | 0.245826 | JFM | 27.41 | 0.63 |
| 2 | 2003 | 0.245826 | FMA | 27.58 | 0.38 |
| 3 | 2003 | 0.245826 | MAM | 27.56 | -0.04 |
| 4 | 2003 | 0.245826 | AMJ | 27.48 | -0.26 |
# Separate plots for clarity
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 12))
# Plotting Average PCI trends
ax1.plot(pci_yearly['Year'], pci_yearly['Average_PCI'], marker='o', linestyle='-', color='b')
ax1.set_title('Yearly Average PCI Trend')
ax1.set_xlabel('Year')
ax1.set_ylabel('Average PCI')
ax1.grid(True)
# Since ONI data has multiple entries per year (seasonal), we need to handle it differently.
# We will use a line plot to connect the average anomalies of each year.
oni_yearly_avg = merged_data.groupby('Year')['ANOM'].mean().reset_index()
# Plotting ONI trends
ax2.plot(oni_yearly_avg['Year'], oni_yearly_avg['ANOM'], marker='o', linestyle='-', color='r')
ax2.set_title('Yearly Average ONI Anomaly Trend')
ax2.set_xlabel('Year')
ax2.set_ylabel('Average ONI Anomaly')
ax2.grid(True)
plt.tight_layout()
plt.show()
# Calculate the Pearson correlation coefficient between the Average_PCI and the average ONI anomaly
correlation = pci_yearly['Average_PCI'].corr(oni_yearly_avg['ANOM'])
correlation
-0.7870259995699593
# Check available years in both datasets to ensure we have the required data for modeling and testing
available_years_pci = pci_yearly['Year'].unique()
available_years_oni = oni_data['Year'].unique()
available_years_pci, available_years_oni
(array([2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]),
array([1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960,
1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971,
1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982,
1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993,
1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024], dtype=int64))
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
# Prepare the data for training and testing
train_data = merged_data[merged_data['Year'] <= 2020]
test_data = merged_data[merged_data['Year'] >= 2021]
# Features (ONI anomalies) and target (PCI values)
X_train = train_data[['ANOM']]
y_train = train_data['Average_PCI']
X_test = test_data[['ANOM']]
y_test = test_data['Average_PCI']
# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Predict using the model
predictions = model.predict(X_test)
# Calculate the R² value
r2 = r2_score(y_test, predictions)
r2
0.4124657629085594